clear
set more off, perm
cd /Users/zachbrown/Projects/PriceTransparency/Data/


import excel using proc_codes/radiology_proc_code_list.xls, firstrow clear
gen n=1
collapse (sum) n, by(proc_code Type)
drop n
duplicates report proc_code
encode Type, generate(proc_type)
drop Type
save proc_codes/radiology_proc_code_list.dta, replace


// Open dataset containing all CPT codes from CMS
use proc_codes/cms_cpt_codes_full.dta, clear

drop if proc_code=="0"

// Find dates when descriptions for a code change (use short description)
sort proc_code last_update
gen end_date = last_update[_n+1] if proc_code==proc_code[_n+1]
format end_date %td

replace end_date = mdy(1,1,2015) if end_date==.

encode long_description, gen(long_desc)

collapse (min) last_update (max) end_date (firstnm) long_desc,by(proc_code short_description)

label values long_desc long_desc
decode long_desc, gen(long_description)
drop long_desc

// Merge list of radiology procs
merge m:1 proc_code using proc_codes/radiology_proc_code_list.dta
gen cpt_radiology = inlist(_merge,2,3)
drop _merge

// Explore descriptions
format long_description %50s
*brow if cpt_radiology==1

// Merge on treated CPT procedure list (version 2)
merge m:1 proc_code using healthcost_procedures/Procedure_codes_HealthCost_website_cpt.dta
tab procname cpt_radiology, miss
drop if _merge==2
drop _merge

// Keep only radiology procedures
replace cpt_radiology=1 if inlist(procname,3,6,13,14,15,18,20) // Treated radiology proc
replace cpt_radiology=0 if inlist(procname,1,2,4,5,7,8,9,10,11,12,16,17,19) // Other treated procs
tab procname cpt_radiology, miss

keep if cpt_radiology==1

gen on_web = inlist(procname,3,6,13,14,15,18,20)
tab on_web

rename procname on_web_procname


// Make procedure class variable from proc_type and on_web_procname
decode proc_type, gen(proc_class_tmp)
replace proc_class_tmp = "X-RAY" if on_web_procname==20
replace proc_class_tmp = "MAMMOGRAM" if on_web_procname==14
replace proc_class_tmp = "ULTRASOUND" if on_web_procname==18
replace proc_class_tmp = "NUCLEAR" if on_web_procname==15
replace proc_class_tmp = "BONE DENSITOMETRY" if on_web_procname==3

tab proc_class_tmp on_web_procname, miss
encode proc_class_tmp, gen(proc_class)
drop proc_class_tmp

// Make procedure ID that is different for different short description
gen start_ym = ym(year(last_updated),month(last_updated))
gen end_ym = ym(year(end_date),month(end_date)) - 1
format start_ym %tm
format end_ym %tm
drop if end_ym <= ym(2004,12)

egen proc_id = group(proc_code short_description), missing 
mdesc proc_id


// Reshape so that there is one row per proc_code
keep proc_code proc_id on_web on_web_procname proc_web_name_id proc_class start_ym end_ym ///
	border_proc // For testing

bys proc_code (start_ym): gen n = _n
tab n

forvalues i = 1/5 {
	gen proc_id_`i' = proc_id if n==`i'
	gen start_ym_`i' = start_ym if n==`i'
	gen end_ym_`i' = end_ym if n==`i'
	format start_ym_`i' %tm
	format end_ym_`i' %tm
}

collapse (firstnm) proc_id_* start_ym_* end_ym_*, by(proc_code on_web on_web_procname proc_web_name_id proc_class border_proc)
order proc_code on_web proc_id_1 start_ym_1 end_ym_1 ///
	proc_id_2 start_ym_2 end_ym_2  ///
	proc_id_3 start_ym_3 end_ym_3  ///
	proc_id_4 start_ym_4 end_ym_4  ///
	proc_id_5 start_ym_5 end_ym_5 

duplicates report proc_code

compress
save build/cms_cpt_codes_radio.dta, replace
